/*
 * TOP SECRET Copyright 2006-2015 Transsion.com All right reserved. This software is the confidential and proprietary
 * information of Transsion.com ("Confidential Information"). You shall not disclose such Confidential Information and
 * shall use it only in accordance with the terms of the license agreement you entered into with Transsion.com.
 */
package com.yunji.framework_template.biz.crawler.tz;

import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import javax.annotation.Resource;

import org.springframework.stereotype.Service;

import com.yunji.framework_template.biz.crawler.ContentImageHandler;
import com.yunji.framework_template.biz.crawler.NewsCrawler;
import com.yunji.framework_template.biz.crawler.SourceType;
import com.yunji.framework_template.common.enumeration.CountryCode;
import com.yunji.framework_template.common.enumeration.NewsType;

/**
 * ClassName:NationNewsCrawler <br/>
 * Date: 2018年12月18日 下午3:31:24 <br/>
 * 
 * @author fenglibin1982@163.com
 * @Blog http://blog.csdn.net/fenglibing
 * @version
 * @see
 */
@Service //抓取时提示证书问题，暂时不处理
public class ThecitizenNewsCrawler extends NewsCrawler {

    @Resource
    private ThecitizenNewsImageHandler thecitizenNewsImageHandler;

    @Override
    public boolean isOkUrl(String url) {
        // 通过判断url的path中是否包括年月来判断其是否为新闻详情页
        URI uri;
        try {
            if (url == null || url.trim().length() == 0) {
                return false;
            }
            if (url.startsWith("mailto") || url.toLowerCase().startsWith("javascript")) {
                return false;
            }
            uri = new URI(url);
            String host = uri.getHost();
            if (host.indexOf("thecitizen.co.tz") < 0) {
                return false;
            }
            String path = uri.getPath();
            String[] pathArr = path.split("/");
            if (pathArr.length == 5 && pathArr[2].split("-").length > 2 && url.indexOf("index.html") > 0) {
                return true;
            } else if (pathArr.length == 6 && pathArr[3].split("-").length > 2 && url.indexOf("index.html") > 0) {
                return true;
            }
        } catch (URISyntaxException e) {
        }
        return false;
    }

    @Override
    public ContentImageHandler getContentImageHandler() {
        return thecitizenNewsImageHandler;
    }

    @Override
    public List<SourceType> getSourceTypeList() {
        List<SourceType> sourceTypeList = new ArrayList<SourceType>();
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/1765046-1765046-2iga6hz/index.html").newsType(NewsType.HOT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/1840340-1840340-ui3u0m/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/1840564-1840564-6mxwp8z/index.html").newsType(NewsType.VIDEO).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/oped/1840568-1840568-iiiqka/index.html").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/photo/1846146-1846146-dogcluz/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/Video/2595322-2595322-10uwyk6z/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/data/2595284-2595284-s1mqdm/index.html").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/jobs-tenders/1873946-1873946-dncqx3z/index.html").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/1840340-1840340-ui3u0m/index.html").newsType(NewsType.ASKANDANSWER).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/Business/1840414-1840414-xmvnqvz/index.html").newsType(NewsType.HEALTH).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/entertainment/1840560-1840560-15r44wjz/index.html").newsType(NewsType.MILITARY).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/Sports/1840572-1840572-jaa5ilz/index.html").newsType(NewsType.CAR).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/Business/Turkish-Airlines-picks-up-best-business-class-award/1840414-4902480-j9y1ry/index.html").newsType(NewsType.TECH).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/Business/Moshi-provider-of-rescue-operations-gets-4-awards/1840414-4902472-nqpupj/index.html").newsType(NewsType.BUSINESS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/News/Business/Maize-flour-prices-fall-marginally/1840414-4902452-ggwqtf/index.html").newsType(NewsType.BLOG).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/1840564-1840564-6mxwp8z/index.html").newsType(NewsType.MAN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/politicalreforms/Africa-s-failed-vision--The-story-of-Thomas-Sankara---1/1843776-4902518-c0jye6z/index.html").newsType(NewsType.KIDS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/politicalreforms/Psychosis-in-the-name-of-human-rights-needs-to-be/1843776-4902510-2ycoa1z/index.html").newsType(NewsType.POLITICS).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/politicalreforms/EU-s-resolution-on-Tanzania--Why-it-matters-to-our-sovereignty/1843776-4902502-n97yxrz/index.html").newsType(NewsType.EDUCATION).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/magazine/When-patients-turn-a-blind-eye-to-your-expert-advice/1840564-4902158-al772k/index.html").newsType(NewsType.IMAGE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/oped/1840568-1840568-iiiqka/index.html").newsType(NewsType.SPORT).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/photo/1846146-1846146-dogcluz/index.html").newsType(NewsType.LIFESTYLE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/Video/2595322-2595322-10uwyk6z/index.html").newsType(NewsType.FUN).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/data/2595284-2595284-s1mqdm/index.html").newsType(NewsType.HOUSE).build());
        sourceTypeList.add(SourceType.builder().url("https://www.thecitizen.co.tz/jobs-tenders/1873946-1873946-dncqx3z/index.html").newsType(NewsType.ASKANDANSWER).build());

        return sourceTypeList;
    }
    @Override
    public Set<String> getCountryCodeSet() {
        Set<String> countryCodeSet = new HashSet<String>();
        countryCodeSet.add(CountryCode.TZ.name());
        return countryCodeSet;
    }
}
